{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "import scipy.cluster.hierarchy as shc\n", "\n", "from sklearn.preprocessing import MinMaxScaler\n", "\n", "from sklearn.cluster import AgglomerativeClustering\n", "from sklearn.cluster import KMeans\n", "\n", "from sklearn.metrics import confusion_matrix\n", "from sklearn.metrics import silhouette_score\n", "\n", "from sklearn import datasets\n", "\n", "%matplotlib inline\n", "pd.set_option(\"display.max_columns\", None)" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "# Lab 22 - Determining the number of clusters\n", "\n", "We will look at two methods for determining the number of clusters. \n", "\n", "## Inertia and the elbow method\n", "The first method assmes you have centers for the clusters, as in k-means clustering. It computes the sum of the squared distances of samples to their closest cluster center.\n", "\n", "We'll load the iris dataset, as in Lab 20." ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "scrolled": true }, "outputs": [ { "data": { "text/html": [ "
\n", " | sepal length (cm) | \n", "sepal width (cm) | \n", "petal length (cm) | \n", "petal width (cm) | \n", "
---|---|---|---|---|
0 | \n", "5.1 | \n", "3.5 | \n", "1.4 | \n", "0.2 | \n", "
1 | \n", "4.9 | \n", "3.0 | \n", "1.4 | \n", "0.2 | \n", "
2 | \n", "4.7 | \n", "3.2 | \n", "1.3 | \n", "0.2 | \n", "
3 | \n", "4.6 | \n", "3.1 | \n", "1.5 | \n", "0.2 | \n", "
4 | \n", "5.0 | \n", "3.6 | \n", "1.4 | \n", "0.2 | \n", "